def lnp_Xw(X_w, x=None, method='gmm', n_comp_max=10, info_crit='bic', njobs=1): ''' Estimate the multi-dimensional pdf at x for a given X_w using a nonparametric density estimation (either KDE or GMM). ''' if x is None: raise ValueError if method not in ['kde', 'gmm']: raise ValueError("method = gkde or gmm") if method == 'gmm': # find best fit component using information criteria (BIC/AIC) gmms, ics = [], [] for i_comp in range(1, n_comp_max + 1): gmm = GMix(n_components=i_comp) gmm.fit(X_w) gmms.append(gmm) if info_crit == 'bic': # Bayesian Information Criterion ics.append(gmm.bic(X_w)) elif info_crit == 'aic': # Akaike information criterion ics.append(gmm.aic(X_w)) ibest = np.array(ics).argmin() # lower the better! kern = gmms[ibest] elif method == 'kde': kern = UT.KayDE(X_w) elif method == 'gkde': # find the best fit bandwidth using cross-validation grid search grid = GridSearchCV(skKDE(), {'bandwidth': np.linspace(0.1, 1.0, 30)}, cv=10, njobs=njobs) # 10-fold cross-validation grid.fit(X_w) kern = grid.best_estimator_ if len(x.shape) == 1: return kern.score_samples(x[:, None]) else: return kern.score_samples(x)
def sfr_mstar_gmm(logmstar, logsfr, n_comp_max=30, silent=False): ''' Fit a 2D gaussian mixture model to the log(M*) and log(SFR) sample of galaxies, ''' # only keep sensible logmstar and log sfr sense = (logmstar > 0.) & (logmstar < 13) & (logsfr > -5) & (logsfr < 4) & (np.isnan(logsfr) == False) if (len(logmstar) - np.sum(sense) > 0) and not silent: warnings.warn(str(len(logmstar) - np.sum(sense))+' galaxies have nonsensical logM* or logSFR values') logmstar = logmstar[np.where(sense)] logsfr = logsfr[np.where(sense)] X = np.array([logmstar, logsfr]).T # (n_sample, n_features) gmms, bics = [], [] for i_n, n in enumerate(range(1, n_comp_max)): gmm = GMix(n_components=n) gmm.fit(X) gmms.append(gmm) bics.append(gmm.bic(X)) # bayesian information criteria ibest = np.array(bics).argmin() # lower the better! gbest = gmms[ibest] if not silent: print(str(len(gbest.means_))+' components') return gbest
def _GMMfit_bins(self, logmstar, logsfr, max_comp=3): ''' Fit GMM components to P(SSFR) of given data and return best-fit ''' n_bin = self._mbins.shape[0] # number of stellar mass bins. assert n_bin > 0, 'no mass bins' # sort logM* into M* bins i_bins = np.digitize(logmstar, np.append(self._mbins[:,0], self._mbins[-1,1])) i_bins -= 1 bin_mid, gbests, nbests, _gmms, _bics = [], [], [], [], [] # fit GMM to p(SSFR) in each log M* bins for i in range(n_bin): # if there are not enough galaxies if not self._has_nbinthresh[i]: continue in_bin = (i_bins == i) x = logsfr[in_bin] - logmstar[in_bin] # logSSFRs x = np.reshape(x, (-1,1)) bin_mid.append(np.median(logmstar[in_bin])) # fit GMMs with a range of components ncomps = range(1, max_comp+1) gmms, bics = [], [] for i_n, n in enumerate(ncomps): gmm = GMix(n_components=n) gmm.fit(x) bics.append(gmm.bic(x)) # bayesian information criteria gmms.append(gmm) # components with the lowest BIC (preferred) i_best = np.array(bics).argmin() n_best = ncomps[i_best] # number of components of the best-fit gbest = gmms[i_best] # best fit GMM # save the best gmm, all the gmms, and bics nbests.append(n_best) gbests.append(gbest) _gmms.append(gmms) _bics.append(bics) assert len(bin_mid) > 0, 'no mass bin has enough galaxies ' if bin_mid[0] > 10.: warnings.warn("The lowest M* bin is greater than 10^10, this may compromise the SFS identification scheme") return bin_mid, gbests, nbests, _gmms, _bics
def _fit_pdf(samples, method='kde', range=None, debug=False, **method_kwargs): ''' fit a probability distribution sampled by given samples using method specified by `method`. This function is designed to fit p(F(theta)), the probability distribution of *derived* properties; however, it works for any PDF really. Parameters ---------- samples : 2d array Nsample x Ndim array of samples from the PDF method : string which method to use for estimating the PDF. Currently supports 'kde' and 'gmm' (default: 'kde') ''' if debug: print('... fitting pdf using %s' % method) # whiten samples avg_samples = np.mean(samples, axis=0) std_samples = np.std(samples, axis=0) samples_w = (samples - avg_samples) / std_samples if method == 'kde': # fit PDF using Kernel Density Estimation #from scipy.stats import gaussian_kde as gkde from sklearn.neighbors import KernelDensity pdf_fit = KernelDensity(kernel='gaussian', **method_kwargs).fit(samples_w) else: from sklearn.mixture import GaussianMixture as GMix if 'n_comp' not in method_kwargs.keys(): raise ValueError("specify number of Gaussians `n_comp` in kwargs") gmm = GMix(n_components=method_kwargs['n_comp']) gmm.fit(samples_w) pdf_fit = gmm return _PDF(pdf_fit, method=method, range=range, avg=avg_samples, std=std_samples)
def _GMMfit_bins_nbest(self, logmstar, logsfr, nbests): ''' Fit GMM components to P(SSFR) of given data and return best-fit ''' n_bin = self._mbins.shape[0] i_bins = np.digitize(logmstar, np.append(self._mbins[:,0], self._mbins[-1,1])) i_bins -= 1 gmms = [] ii = 0 for i in range(n_bin): # if there are not enough galaxies if not self._has_nbinthresh[i]: continue in_bin = (i_bins == i) x = logsfr[in_bin] - logmstar[in_bin] # logSSFRs x = np.reshape(x, (-1,1)) gmm = GMix(n_components=nbests[ii]) gmm.fit(x) # save the best gmm, all the gmms, and bics gmms.append(gmm) ii += 1 return gmms
def divGMF(div_func='kl', Nref=1000, K=5, n_mc=10, n_comp_max=10, n_mocks=2000): ''' compare the divergence estimates between D( gauss(C_gmf) || gauss(C_gmf) ), D( gmfs || gauss(C_gmf) ), D( gmfs || p(gmfs) KDE), D( gmfs || p(gmfs) GMM), D( gmfs || PI p(gmfs^i_ICA) KDE), and D( gmfs || PI p(gmfs^i_ICA) GMM) ''' if isinstance(Nref, float): Nref = int(Nref) # read in mock GMFs from all HOD realizations (20,000 mocks) gmfs_mock = NG.X_gmf_all()[:n_mocks] n_mock = gmfs_mock.shape[0] # number of mocks print("%i mocks" % n_mock) gmfs_mock_meansub, _ = NG.meansub(gmfs_mock) # mean subtract X_w, W = NG.whiten(gmfs_mock_meansub) X_ica, _ = NG.Ica(X_w) # ICA transformation C_gmf = np.cov(X_w.T) # covariance matrix # p(gmfs) GMM gmms, bics = [], [] for i_comp in range(1,n_comp_max+1): gmm = GMix(n_components=i_comp) gmm.fit(X_w) gmms.append(gmm) bics.append(gmm.bic(X_w)) ibest = np.array(bics).argmin() kern_gmm = gmms[ibest] # p(gmfs) KDE t0 = time.time() grid = GridSearchCV(skKDE(), {'bandwidth': np.linspace(0.1, 1.0, 30)}, cv=10) # 10-fold cross-validation grid.fit(X_w) kern_kde = grid.best_estimator_ dt = time.time() - t0 print('%f sec' % dt) # PI p(gmfs^i_ICA) GMM kern_gmm_ica = [] for ibin in range(X_ica.shape[1]): gmms, bics = [], [] for i_comp in range(1,n_comp_max+1): gmm = GMix(n_components=i_comp) gmm.fit(X_ica[:,ibin][:,None]) gmms.append(gmm) bics.append(gmm.bic(X_ica[:,ibin][:,None])) ibest = np.array(bics).argmin() kern_gmm_ica.append(gmms[ibest]) # PI p(gmfs^i_ICA) KDE kern_kde_ica = [] for ibin in range(X_ica.shape[1]): t0 = time.time() grid = GridSearchCV(skKDE(), {'bandwidth': np.linspace(0.1, 1.0, 30)}, cv=10) # 10-fold cross-validation grid.fit(X_ica[:,ibin][:,None]) kern_kde_ica.append(grid.best_estimator_) dt = time.time() - t0 print('%f sec' % dt) # caluclate the divergences now div_gauss_ref, div_gauss = [], [] div_gmm, div_gmm_ica = [], [] div_kde, div_kde_ica = [], [] for i in range(n_mc): print('%i montecarlo' % i) t_start = time.time() # reference divergence in order to showcase the estimator's scatter # Gaussian distribution described by C_gmf with same n_mock mocks gauss = mvn(np.zeros(gmfs_mock.shape[1]), C_gmf, size=n_mock) div_gauss_ref_i = NG.kNNdiv_gauss(gauss, C_gmf, Knn=K, div_func=div_func, Nref=Nref) div_gauss_ref.append(div_gauss_ref_i) # estimate divergence between gmfs_white and a # Gaussian distribution described by C_gmf div_gauss_i = NG.kNNdiv_gauss(X_w, C_gmf, Knn=K, div_func=div_func, Nref=Nref) div_gauss.append(div_gauss_i) # D( gmfs || p(gmfs) GMM) div_gmm_i = NG.kNNdiv_Kernel(X_w, kern_gmm, Knn=K, div_func=div_func, Nref=Nref, compwise=False) div_gmm.append(div_gmm_i) # D( gmfs || p(gmfs) KDE) div_kde_i = NG.kNNdiv_Kernel(X_w, kern_kde, Knn=K, div_func=div_func, Nref=Nref, compwise=False) div_kde.append(div_kde_i) # D( gmfs || PI p(gmfs^i_ICA) GMM), div_gmm_ica_i = NG.kNNdiv_Kernel(X_ica, kern_gmm_ica, Knn=K, div_func=div_func, Nref=Nref, compwise=True) div_gmm_ica.append(div_gmm_ica_i) # D( gmfs || PI p(gmfs^i_ICA) KDE), div_kde_ica_i = NG.kNNdiv_Kernel(X_ica, kern_kde_ica, Knn=K, div_func=div_func, Nref=Nref, compwise=True) div_kde_ica.append(div_kde_ica_i) print('t= %f sec' % round(time.time()-t_start,2)) fig = plt.figure(figsize=(10,5)) sub = fig.add_subplot(111) hrange = [-0.15, 0.6] nbins = 50 divs = [div_gauss_ref, div_gauss, div_gmm, div_kde, div_gmm_ica, div_kde_ica] labels = ['Ref.', r'$D(\{\zeta_i^{(m)}\}\parallel \mathcal{N}({\bf C}^{(m)}))$', r'$D(\{\zeta^{(m)}\}\parallel p_\mathrm{GMM}(\{\zeta^{m}\}))$', r'$D(\{\zeta^{(m)}\}\parallel p_\mathrm{KDE}(\{\zeta^{m}\}))$', r'$D(\{\zeta_\mathrm{ICA}^{(m)}\}\parallel \prod_{i} p^\mathrm{GMM}(\{\zeta_{i, \mathrm{ICA}}^{m}\}))$', r'$D(\{\zeta_\mathrm{ICA}^{(m)}\}\parallel \prod_{i} p^\mathrm{KDE}(\{\zeta_{i, \mathrm{ICA}}^{m}\}))$'] y_max = 0. for div, lbl in zip(divs, labels): hh = np.histogram(np.array(div), normed=True, range=hrange, bins=nbins) bp = UT.bar_plot(*hh) sub.fill_between(bp[0], np.zeros(len(bp[0])), bp[1], edgecolor='none', alpha=0.5, label=lbl) y_max = max(y_max, bp[1].max()) if (np.average(div) < hrange[0]) or (np.average(div) > hrange[1]): print('divergence of %s (%f) is outside range' % (lbl, np.average(div))) sub.set_xlim(hrange) sub.set_ylim([0., y_max*1.2]) sub.legend(loc='upper left', prop={'size': 15}) # xlabels if 'renyi' in div_func: alpha = float(div_func.split(':')[-1]) sub.set_xlabel(r'Renyi-$\alpha='+str(alpha)+'$ divergence', fontsize=20) elif 'kl' in div_func: sub.set_xlabel(r'KL divergence', fontsize=20) if 'renyi' in div_func: str_div = 'renyi'+str(alpha) elif div_func == 'kl': str_div = 'kl' f_fig = ''.join([UT.fig_dir(), 'tests/kNN_divergence.gmf.K', str(K), '.', str(n_mocks), '.', str_div, '.png']) fig.savefig(f_fig, bbox_inches='tight') return None
def diverge(obvs, diver, div_func='kl', Nref=1000, K=5, n_mc=10, n_comp_max=10, n_mocks=20000, pk_mock='patchy.z1', NorS='ngc', njobs=1): ''' calculate the divergences: - D( gauss(C_X) || gauss(C_X) ) - D( mock X || gauss(C_X)) - D( mock X || p(X) KDE) - D( mock X || p(X) GMM) - D( mock X || PI p(X^i_ICA) KDE) - D( mock X || PI p(X^i_ICA) GMM) ''' if isinstance(Nref, float): Nref = int(Nref) if diver not in [ 'ref', 'pX_gauss', 'pX_gauss_hartlap', 'pX_GMM', 'pX_GMM_ref', 'pX_KDE', 'pX_KDE_ref', 'pX_scottKDE', 'pX_scottKDE_ref', 'pXi_ICA_GMM', 'pXi_ICA_GMM_ref', 'pXi_parICA_GMM', 'pXi_parICA_GMM_ref', 'pXi_ICA_KDE', 'pXi_ICA_KDE_ref', 'pXi_parICA_KDE', 'pXi_parICA_KDE_ref', 'pXi_ICA_scottKDE', 'pXi_ICA_scottKDE_ref', 'pXi_parICA_scottKDE', 'pXi_parICA_scottKDE_ref' ]: raise ValueError str_obvs = '' if obvs == 'pk': str_obvs = '.' + NorS if 'renyi' in div_func: alpha = float(div_func.split(':')[-1]) str_div = 'renyi' + str(alpha) elif div_func == 'kl': str_div = 'kl' str_comp = '' if 'GMM' in diver: str_comp = '.ncomp' + str(n_comp_max) f_dat = ''.join([ UT.dat_dir(), 'diverg/', 'diverg.', obvs, str_obvs, '.', diver, '.K', str(K), str_comp, '.Nref', str(Nref), '.', str_div, '.dat' ]) if not os.path.isfile(f_dat): print('-- writing to -- \n %s' % f_dat) f_out = open(f_dat, 'w') else: print('-- appending to -- \n %s' % f_dat) # read in mock data X if obvs == 'pk': X_mock = NG.X_pk_all(pk_mock, NorS=NorS, sys='fc') elif obvs == 'gmf': if n_mocks is not None: X_mock = NG.X_gmf_all()[:n_mocks] else: X_mock = NG.X_gmf_all() else: raise ValueError("obvs = 'pk' or 'gmf'") n_mock = X_mock.shape[0] # number of mocks print("%i mocks" % n_mock) X_mock_meansub, _ = NG.meansub(X_mock) # mean subtract X_w, W = NG.whiten(X_mock_meansub) if '_ICA' in diver: X_ica, W_ica = NG.Ica(X_w) # ICA transformation W_ica_inv = sp.linalg.pinv(W_ica.T) elif '_parICA' in diver: # FastICA transformation using parallel algorithm X_ica, W_ica = NG.Ica(X_w, algorithm='parallel') W_ica_inv = sp.linalg.pinv(W_ica.T) if diver in ['pX_gauss', 'ref']: C_X = np.cov(X_w.T) # covariance matrix elif diver in ['pX_gauss_hartlap']: C_X = np.cov(X_w.T) # covariance matrix f_hartlap = (n_mock - float(X_mock.shape[1]) - 2.) / (n_mock - 1.) print("hartlap factor = %f" % f_hartlap) C_X = C_X / f_hartlap # scale covariance matrix by hartlap factor elif diver in ['pX_GMM', 'pX_GMM_ref']: # p(mock X) GMM gmms, bics = [], [] for i_comp in range(1, n_comp_max + 1): gmm = GMix(n_components=i_comp) gmm.fit(X_w) gmms.append(gmm) bics.append(gmm.bic(X_w)) ibest = np.array(bics).argmin() kern_gmm = gmms[ibest] elif diver in ['pX_KDE', 'pX_KDE_ref']: # p(mock X) KDE t0 = time.time() grid = GridSearchCV(skKDE(), {'bandwidth': np.linspace(0.1, 1.0, 30)}, cv=10, n_jobs=njobs) # 10-fold cross-validation grid.fit(X_w) kern_kde = grid.best_estimator_ dt = time.time() - t0 print('%f sec' % dt) elif diver in ['pX_scottKDE', 'pX_scottKDE_ref']: # p(mock X) KDE # calculate Scott's Rule KDE t0 = time.time() kern_kde = UT.KayDE(X_w) dt = time.time() - t0 print('%f sec' % dt) elif diver in [ 'pXi_ICA_GMM', 'pXi_ICA_GMM_ref', 'pXi_parICA_GMM', 'pXi_parICA_GMM_ref' ]: # PI p(X^i_ICA) GMM kern_gmm_ica = [] for ibin in range(X_ica.shape[1]): gmms, bics = [], [] for i_comp in range(1, n_comp_max + 1): gmm = GMix(n_components=i_comp) gmm.fit(X_ica[:, ibin][:, None]) gmms.append(gmm) bics.append(gmm.bic(X_ica[:, ibin][:, None])) ibest = np.array(bics).argmin() kern_gmm_ica.append(gmms[ibest]) elif diver in [ 'pXi_ICA_KDE', 'pXi_ICA_KDE_ref', 'pXi_parICA_KDE', 'pXi_parICA_KDE_ref' ]: # PI p(X^i_ICA) KDE kern_kde_ica = [] for ibin in range(X_ica.shape[1]): t0 = time.time() grid = GridSearchCV(skKDE(), {'bandwidth': np.linspace(0.1, 1.0, 30)}, cv=10, n_jobs=njobs) # 10-fold cross-validation grid.fit(X_ica[:, ibin][:, None]) kern_kde_ica.append(grid.best_estimator_) dt = time.time() - t0 print('%f sec' % dt) elif diver in [ 'pXi_ICA_scottKDE', 'pXi_ICA_scottKDE_ref', 'pXi_parICA_scottKDE', 'pXi_parICA_scottKDE_ref' ]: # PI p(X^i_ICA) KDE kern_kde_ica = [] for ibin in range(X_ica.shape[1]): kern_kde_i = UT.KayDE(X_ica[:, ibin]) kern_kde_ica.append(kern_kde_i) # caluclate the divergences now divs = [] for i in range(n_mc): print('%i montecarlo' % i) t0 = time.time() if diver in ['pX_gauss', 'pX_gauss_hartlap']: # estimate divergence between gmfs_white and a # Gaussian distribution described by C_gmf div_i = NG.kNNdiv_gauss(X_w, C_X, Knn=K, div_func=div_func, Nref=Nref, njobs=njobs) elif diver == 'ref': # reference divergence in order to showcase the estimator's scatter # Gaussian distribution described by C_gmf with same n_mock mocks gauss = mvn(np.zeros(X_mock.shape[1]), C_X, size=n_mock) div_i = NG.kNNdiv_gauss(gauss, C_X, Knn=K, div_func=div_func, Nref=Nref, njobs=njobs) elif diver == 'pX_GMM': # D( mock X || p(X) GMM) div_i = NG.kNNdiv_Kernel(X_w, kern_gmm, Knn=K, div_func=div_func, Nref=Nref, compwise=False, njobs=njobs) elif diver == 'pX_GMM_ref': # D( sample from p(X) GMM || p(X) GMM) samp = kern_gmm.sample(n_mock) div_i = NG.kNNdiv_Kernel(samp[0], kern_gmm, Knn=K, div_func=div_func, Nref=Nref, compwise=False, njobs=njobs) elif diver in ['pX_KDE', 'pX_scottKDE']: # D( mock X || p(X) KDE) div_i = NG.kNNdiv_Kernel(X_w, kern_kde, Knn=K, div_func=div_func, Nref=Nref, compwise=False, njobs=njobs) divs.append(div_i) elif diver in ['pX_KDE_ref', 'pX_scottKDE_ref' ]: # D( sample from p(X) KDE || p(X) KDE) samp = kern_kde.sample(n_mock) div_i = NG.kNNdiv_Kernel(samp, kern_kde, Knn=K, div_func=div_func, Nref=Nref, compwise=False, njobs=njobs) divs.append(div_i) elif diver in ['pXi_ICA_GMM', 'pXi_parICA_GMM']: # D( mock X || PI p(X^i_ICA) GMM), div_i = NG.kNNdiv_Kernel(X_w, kern_gmm_ica, Knn=K, div_func=div_func, Nref=Nref, compwise=True, njobs=njobs, W_ica_inv=W_ica_inv) elif diver in ['pXi_ICA_GMM_ref', 'pXi_parICA_GMM_ref']: # D( ref. sample || PI p(X^i_ICA) GMM), samp = np.zeros((n_mock, X_ica.shape[1])) for icomp in range(X_ica.shape[1]): samp_i = kern_gmm_ica[icomp].sample(n_mock) samp[:, icomp] = samp_i[0].flatten() samp = np.dot(samp, W_ica_inv.T) div_i = NG.kNNdiv_Kernel(samp, kern_gmm_ica, Knn=K, div_func=div_func, Nref=Nref, compwise=True, njobs=njobs, W_ica_inv=W_ica_inv) elif diver in [ 'pXi_ICA_KDE', 'pXi_ICA_scottKDE', 'pXi_parICA_KDE', 'pXi_parICA_scottKDE' ]: # D( mock X || PI p(X^i_ICA) KDE), div_i = NG.kNNdiv_Kernel(X_w, kern_kde_ica, Knn=K, div_func=div_func, Nref=Nref, compwise=True, njobs=njobs, W_ica_inv=W_ica_inv) elif diver in [ 'pXi_ICA_KDE_ref', 'pXi_ICA_scottKDE_ref', 'pXi_parICA_KDE_ref', 'pXi_parICA_scottKDE_ref' ]: # D( ref sample || PI p(X^i_ICA) KDE), samp = np.zeros((n_mock, X_ica.shape[1])) for icomp in range(X_ica.shape[1]): samp_i = kern_kde_ica[icomp].sample(n_mock) samp[:, icomp] = samp_i.flatten() samp = np.dot(samp, W_ica_inv.T) div_i = NG.kNNdiv_Kernel(samp, kern_kde_ica, Knn=K, div_func=div_func, Nref=Nref, compwise=True, njobs=njobs, W_ica_inv=W_ica_inv) print(div_i) f_out = open(f_dat, 'a') f_out.write('%f \n' % div_i) f_out.close() return None
def lnp_Xw_i(X_w, i_bins, x=None, method='kde', n_comp_max=10, info_crit='bic', njobs=1): ''' Estimate the log pdf of X_w[:,i_bins] at x using a nonparametric density estimation (either KDE or GMM). parameters ---------- X_w : np.ndarray N_sample x N_feature matrix i_bins : int or list of ints specifies the feature bin(s) x : np.ndarray or list of np.ndarray values to evaluate the pdf. Must be consistent with i_bins! ''' if x is None: raise ValueError if method not in ['kde', 'gmm']: raise ValueError("method = gkde or gmm") if isinstance(i_bins, int): i_bins = [i_bins] if np.max(i_bins) > X_w.shape[1] or np.min(i_bins) < 0: raise ValueError if len(i_bins) > 1: # more than one bin if not isinstance(x, list): raise ValueError else: if len(i_bins) != len(x): raise ValueError else: x = [x] lnpdfs = [] for ii, i_bin in enumerate(i_bins): if method == 'gmm': # find best fit component using information criteria (BIC/AIC) gmms, ics = [], [] for i_comp in range(1, n_comp_max + 1): gmm = GMix(n_components=i_comp) gmm.fit(X_w[:, i_bin]) gmms.append(gmm) if info_crit == 'bic': # Bayesian Information Criterion ics.append(gmm.bic(X_w[:, i_bin])) elif info_crit == 'aic': # Akaike information criterion ics.append(gmm.aic(X_w[:, i_bin])) ibest = np.array(ics).argmin() # lower the better! kern = gmms[ibest] elif method == 'kde': # simple scott's rule KDE kern = UT.KayDE(X_w[:, i_bin]) elif method == 'gkde': # find the best fit bandwidth using cross-validation grid search t0 = time.time() grid = GridSearchCV(skKDE(), {'bandwidth': np.linspace(0.1, 1.0, 30)}, cv=10, n_jobs=njobs) # 10-fold cross-validation grid.fit(X_w[:, i_bin][:, None]) kern = grid.best_estimator_ dt = time.time() - t0 print('%f sec' % dt) lnpdfs.append(kern.score_sample(x[ii][:, None])) if len(i_bins) == 1: return np.array(lnpdfs[0]) else: return np.array(lnpdfs)