def kde_fit_and_score(Y, Yhs, n=None): """ Y is [n x p] Yhs is list of [n x p] fits kdes for Y and Yhs returns scores of each kde from Yhs based on deviation from kde fit on Y """ n = n if n is not None else 50 * Y.shape[0] # fit kdes kde_base, bandwidth = pykde.kde_fit_cv(Y) # kdes = [pykde.kde_fit(Yh, bandwidth) for Yh in Yhs] kdes = [pykde.kde_fit_cv(Yh) for Yh in Yhs] # print bandwidth, [kde[1] for kde in kdes] kdes = [kde[0] for kde in kdes] # opt1: eval on random grid sampled within bounds of all points # grid = sample_unif_grid(np.vstack([np.vstack(Yhs), Y]), n) # opt2: eval on Y # grid = Y # opt3: eval on random sample from kde_base grid = kde_base.sample(n) return score_all(kde_base, kdes, grid)
def kde_fit_and_score(Y, Yhs, n=None): """ Y is [n x p] Yhs is list of [n x p] fits kdes for Y and Yhs returns scores of each kde from Yhs based on deviation from kde fit on Y """ n = n if n is not None else 50*Y.shape[0] # fit kdes kde_base, bandwidth = pykde.kde_fit_cv(Y) # kdes = [pykde.kde_fit(Yh, bandwidth) for Yh in Yhs] kdes = [pykde.kde_fit_cv(Yh) for Yh in Yhs] # print bandwidth, [kde[1] for kde in kdes] kdes = [kde[0] for kde in kdes] # opt1: eval on random grid sampled within bounds of all points # grid = sample_unif_grid(np.vstack([np.vstack(Yhs), Y]), n) # opt2: eval on Y # grid = Y # opt3: eval on random sample from kde_base grid = kde_base.sample(n) return score_all(kde_base, kdes, grid)
def eval_all(matfile, outfile=None): """ fit kde to Y then return probability of each Yh under that kde """ Y, Yhs = load_Ys(matfile) kde_base, bandwidth = pykde.kde_fit_cv(Y) fnm = matfile.replace('.mat', '.pickle') pickle.dump(kde_base, open(fnm, "wb")) scorefcn = lambda ps: np.sum(np.log(ps)) if len(Yhs.shape) == 1: scores = [scorefcn(pykde.kde_eval(kde_base, y)) for y in Yhs] else: scores = [[scorefcn(pykde.kde_eval(kde_base, y)) for y in Yh]\ for Yh in Yhs] write_scores(scores, outfile) return scores