def check_dpm(impl, data_count, beta0): check_cm(impl) data = histogram(np.random.randint(50, size=data_count)) data = dict([(str(i), obs) for i, obs in enumerate(data)]) betas = dict([(str(i), (1 - beta0) / len(data)) for i, obs in enumerate(data)]) hp = { 'gamma': 1., 'alpha': 1., 'beta0': beta0, 'betas': betas } ss = {'counts': data} cm = ComponentModel( impl, ss=ss, hp=hp) samples = cm.sample_data(SAMPS) counts = list(histogram([y for y in samples if y != -1])) probs = list(np.exp([cm.pred_prob(x) for x in range(max(samples) + 1)])) counts.append(len([y for y in samples if y == -1])) probs.append(np.exp(cm.pred_prob(-1))) assert_less(1 - sum(probs), THRESH) probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN]) p = mgof(probs, counts, SAMPS, truncated=True) assert_greater(p, THRESH)
def check_probs(a, b): check_cm(a) check_cm(b) a = ComponentModel(a) a.realize_hp() b = ComponentModel(b, hp=a.dump_hp()) dps = [a.sample_data() for _ in range(DPS)] for y in dps: assert_almost_equal(a.data_prob(), b.data_prob()) assert_almost_equal(a.pred_prob(y), b.pred_prob(y)) a.add_data(y) b.add_data(y)
def test_crp_equals_pyp(): alphas = [1., 5., 10.] for alpha in alphas: n = 1000 crp = ComponentModel('CRP', hp={'alpha': alpha}) pyp = ComponentModel('PYP', hp={'alpha': alpha, 'd': 0.}) y = [0] * n for i in range(n): y[i] = crp.sample_data() assert_almost_equal(crp.pred_prob(y[i]), pyp.pred_prob(y[i])) crp.add_data(y[i]) pyp.add_data(y[i]) assert_almost_equal(crp.data_prob(), pyp.data_prob())
def check_dpm(impl, data_count, beta0): check_cm(impl) data = histogram(np.random.randint(50, size=data_count)) data = dict([(str(i), obs) for i, obs in enumerate(data)]) betas = dict([(str(i), (1 - beta0) / len(data)) for i, obs in enumerate(data)]) hp = {'gamma': 1., 'alpha': 1., 'beta0': beta0, 'betas': betas} ss = {'counts': data} cm = ComponentModel(impl, ss=ss, hp=hp) samples = cm.sample_data(SAMPS) counts = list(histogram([y for y in samples if y != -1])) probs = list(np.exp([cm.pred_prob(x) for x in range(max(samples) + 1)])) counts.append(len([y for y in samples if y == -1])) probs.append(np.exp(cm.pred_prob(-1))) assert_less(1 - sum(probs), THRESH) probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN]) p = mgof(probs, counts, SAMPS, truncated=True) assert_greater(p, THRESH)
def check_sums(name): check_cm(name) cm = ComponentModel(name) cm.realize_hp() values = [cm.sample_data() for _ in range(COUNT)] score = 0. for value in values: score += cm.pred_prob(value) cm.add_data(value) assert_almost_equal(score, cm.data_prob())
def check_nich(impl, data_count, mean, std): check_cm(impl) ss = None if data_count: data = np.random.normal(mean, std, size=data_count) ss = {'count': data_count, 'mean': data.mean(), 'variance': data.var()} cm = ComponentModel(impl, ss=ss) samples = cm.sample_data(SAMPS) counts, bin_ranges = bin_samples(samples) #use of quadrature is unfortunate but for now #it's the easiest way to score bins and seems to work pdf = lambda x: np.exp(cm.pred_prob(x)) probs = [quad(pdf, m, M, epsabs=0., epsrel=1e-6)[0] for m, M in bin_ranges] assert_less(1 - sum(probs), THRESH) probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN]) p = mgof(probs, counts, SAMPS, truncated=True) assert_greater(p, THRESH)
def check_nich(impl, data_count, mean, std): check_cm(impl) ss = None if data_count: data = np.random.normal(mean, std, size=data_count) ss = { 'count': data_count, 'mean': data.mean(), 'variance': data.var() } cm = ComponentModel(impl, ss=ss) samples = cm.sample_data(SAMPS) counts, bin_ranges = bin_samples(samples) #use of quadrature is unfortunate but for now #it's the easiest way to score bins and seems to work pdf = lambda x: np.exp(cm.pred_prob(x)) probs = [quad(pdf, m, M, epsabs=0., epsrel=1e-6)[0] for m, M in bin_ranges] assert_less(1 - sum(probs), THRESH) probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN]) p = mgof(probs, counts, SAMPS, truncated=True) assert_greater(p, THRESH)
def add_remove_add(name, raw_hps, raw_ss0=None): ''' This tests add_data, remove_data, pred_prob, data_prob ''' DATA_COUNT = 20 for raw_hp in raw_hps: cm = ComponentModel(name, hp=raw_hp, ss=raw_ss0) cm.realize_hp() data = [] score = 0 for _ in range(DATA_COUNT): dp = cm.sample_data() data.append(dp) score += cm.pred_prob(dp) cm.add_data(dp) cm_all = ComponentModel(name, ss=cm.dump_ss()) assert_close( score, cm.data_prob(), err_msg='p(x1,...,xn) != p(x1) p(x2|x1) p(xn|...)') random.shuffle(data) for dp in data: cm.remove_data(dp) cm0 = ComponentModel(name, ss=raw_ss0) assert_close(cm.ss, cm0.ss, err_msg='ss + data - data != ss') random.shuffle(data) for dp in data: cm.add_data(dp) assert_close(cm.ss, cm_all.ss, err_msg='ss - data + data != ss')
def add_remove_add(name, raw_hps, raw_ss0=None): ''' This tests add_data, remove_data, pred_prob, data_prob ''' DATA_COUNT = 20 for raw_hp in raw_hps: cm = ComponentModel(name, hp=raw_hp, ss=raw_ss0) cm.realize_hp() data = [] score = 0 for _ in range(DATA_COUNT): dp = cm.sample_data() data.append(dp) score += cm.pred_prob(dp) cm.add_data(dp) cm_all = ComponentModel(name, ss=cm.dump_ss()) assert_close(score, cm.data_prob(), err_msg='p(x1,...,xn) != p(x1) p(x2|x1) p(xn|...)') random.shuffle(data) for dp in data: cm.remove_data(dp) cm0 = ComponentModel(name, ss=raw_ss0) assert_close(cm.ss, cm0.ss, err_msg='ss + data - data != ss') random.shuffle(data) for dp in data: cm.add_data(dp) assert_close(cm.ss, cm_all.ss, err_msg='ss - data + data != ss')