def calc_wde1(dist_code, num_obvs, sample_no, wave_name, **kwargs): """ Calculates WDE CV for given params """ dist = dist_from_code(dist_code) #what = what + ('.single_%s' % single) k = kwargs['k'] #what = what + ('.k_%d' % k) j0 = kwargs['j0'] #what = what + ('.j0_%d' % j0) delta_j = kwargs['delta_j'] # plot plot = kwargs['plot'] source = sample_name(dist_code, num_obvs, sample_no) data = read_data(source) assert data.shape[0] == num_obvs t0 = datetime.now() wde = WaveletDensityEstimator(((wave_name, j0), (wave_name, j0)), k=k, delta_j=delta_j) wde.fit(data) if plot: import plotlib fname = 'test.png' plotlib.do_plot_wde(wde, fname, dist, plot) else: elapsed = (datetime.now() - t0).total_seconds() hd, corr_factor = hellinger_distance(dist, wde) params = wde.pdf.nparams print('RESULT', dist_code, num_obvs, sample_no, wave_name, k, delta_j, params, None, None, None, hd, elapsed)
def _kde(sample_no, data): t0 = datetime.now() kde = KDEMultivariate(data, 'c' * data.shape[1], bw='cv_ml') ## cv_ml elapsed = (datetime.now() - t0).total_seconds() hd, corr_factor = hellinger_distance(dist, kde) return (dist_code, num_obvs, sample_no, 'KDE', '', '', '', 0, 0, 0, num_obvs, 0.0, hd, elapsed)
def gen(): for ix, k, delta_j in itt.product(sample_range, k_range, delta_j_range): source = sample_name(dist_code, num_obvs, ix) data = read_data(source) assert data.shape[0] == num_obvs t0 = datetime.now() wde = WaveletDensityEstimator(((wave_name, j0),(wave_name, j0)) , k=k, delta_j=delta_j) wde.fit(data) elapsed = (datetime.now() - t0).total_seconds() hd, corr_factor = hellinger_distance(dist, wde) params = wde.pdf.nparams yield result_wde_classic(dist_code, num_obvs, ix, wave_name, k, delta_j, params, hd, elapsed) for loss, ordering, is_single in WaveletDensityEstimator.valid_options(single): t0 = datetime.now() wde.cvfit(data, loss, ordering, is_single=is_single) elapsed = (datetime.now() - t0).total_seconds() hd, corr_factor = hellinger_distance(dist, wde) params = wde.pdf.nparams yield result_wde_cv(dist_code, num_obvs, ix, wave_name, k, delta_j, params, loss, ordering, is_single, hd, elapsed)
def gen(): for ix in sample_range: source = sample_name(dist_code, num_obvs, ix) data = read_data(source) assert data.shape[0] == num_obvs t0 = datetime.now() kde = KDEMultivariate(data, 'c' * data.shape[1], bw='cv_ml') ## cv_ml elapsed = (datetime.now() - t0).total_seconds() hd, corr_factor = hellinger_distance(dist, kde) yield result_kde(dist_code, num_obvs, ix, hd, elapsed)