def make_staged_seq2(jump, temp): """ run: mpiexec -n 12 """ rec = load_hypo('out/simulations/staged/', ['staged', 'normal0', 'normal1']) seen = set() work_list = slice_list(range(size), 3) for e in rec: for h in e[1]: if h in seen: continue seen.add(h) if rank in work_list[0]: seq = [] infos = [[i, min(4 * ((int(i) - 1) / 48 + 1), 12)] for i in [10**e for e in np.arange(0, 2.2, 0.1)]] for e in infos: prob_dict = {} language = AnBn(max_length=e[1] + (e[1] % 2 != 0)) eval_data = language.sample_data_as_FuncData(e[0]) for h in seen: h.likelihood_temperature = temp prob_dict[h] = h.compute_posterior(eval_data) seq.append(prob_dict) print 'rank: ', rank, e, 'done' fff() elif rank in work_list[1]: seq = [] infos = [[i, 12] for i in [10**e for e in np.arange(0, 2.2, 0.1)]] for e in infos: prob_dict = {} language = AnBn(max_length=e[1]) eval_data = language.sample_data_as_FuncData(e[0]) for h in seen: h.likelihood_temperature = temp prob_dict[h] = h.compute_posterior(eval_data) seq.append(prob_dict) print 'rank: ', rank, e, 'done' fff() else: seq = [] infos = [[i, 12] for i in [10**e for e in np.arange(0, 2.2, 0.1)]] for e in infos: prob_dict = {} eval_data = uniform_data(e[0], e[1]) for h in seen: h.likelihood_temperature = temp prob_dict[h] = h.compute_posterior(eval_data) seq.append(prob_dict) print 'rank: ', rank, e, 'done' fff() # TODO no need ? from copy import deepcopy dict_0 = deepcopy(seq[0]) for h in dict_0: dict_0[h] = h.compute_posterior( [FunctionData(input=[], output=Counter())]) seq.insert(0, dict_0) dump(seq, open('seq' + str(rank) + suffix, 'w'))
def make_staged_seq2(jump, temp): """ run: mpiexec -n 12 """ rec = load_hypo('out/simulations/staged/', ['staged', 'normal0', 'normal1']) seen = set() work_list = slice_list(range(size), 3) for e in rec: for h in e[1]: if h in seen: continue seen.add(h) if rank in work_list[0]: seq = [] infos = [[i, min(4*((int(i)-1)/48+1), 12)] for i in [10**e for e in np.arange(0, 2.2, 0.1)]] for e in infos: prob_dict = {} language = AnBn(max_length=e[1]+(e[1]%2!=0)) eval_data = language.sample_data_as_FuncData(e[0]) for h in seen: h.likelihood_temperature = temp prob_dict[h] = h.compute_posterior(eval_data) seq.append(prob_dict) print 'rank: ', rank, e, 'done'; fff() elif rank in work_list[1]: seq = [] infos = [[i, 12] for i in [10**e for e in np.arange(0, 2.2, 0.1)]] for e in infos: prob_dict = {} language = AnBn(max_length=e[1]) eval_data = language.sample_data_as_FuncData(e[0]) for h in seen: h.likelihood_temperature = temp prob_dict[h] = h.compute_posterior(eval_data) seq.append(prob_dict) print 'rank: ', rank, e, 'done'; fff() else: seq = [] infos = [[i, 12] for i in [10**e for e in np.arange(0, 2.2, 0.1)]] for e in infos: prob_dict = {} eval_data = uniform_data(e[0], e[1]) for h in seen: h.likelihood_temperature = temp prob_dict[h] = h.compute_posterior(eval_data) seq.append(prob_dict) print 'rank: ', rank, e, 'done'; fff() # TODO no need ? from copy import deepcopy dict_0 = deepcopy(seq[0]) for h in dict_0: dict_0[h] = h.compute_posterior([FunctionData(input=[], output=Counter())]) seq.insert(0, dict_0) dump(seq, open('seq'+str(rank)+suffix, 'w'))
def make_staged_posterior_seq(_dir, temperature, lang_name, dtype): """ script: python parse_hypothesis.py --mode=make_staged_posterior_seq --file=file --temp=1 --language=AnBn --dtype=staged/uniform 1. read raw file 2. compute fixed Counter 3. compute posterior for different amounts dumped posterior format: [topn, [z,amount,finite,[s1,s2,....]], [], [], ....] NOTE: if _dir is previously dumped posterior seq, then we use it """ if not (os.path.isfile(_dir) and 'posterior_seq' in _dir): topn = set() for filename in os.listdir(_dir): if ('staged' in filename or 'normal' in filename) and 'seq' not in filename: print 'load', filename _set = load(open(_dir + filename)) topn.update([h for h in _set]) topn = list(topn) # fix the llcnts to save time and make curve smooth print 'get llcnts...' for h in topn: llcnts = Counter([h() for _ in xrange(2048)]) h.fixed_ll_counts = llcnts seq = [] seq.append(topn) for amount, finite in mk_staged_wlist(0, 200, 2, [48, 96]): print 'posterior on', amount, finite if dtype == 'staged': language = instance(lang_name, finite) eval_data = language.sample_data_as_FuncData(amount) elif dtype == 'uniform': eval_data = uniform_data(amount, 12) for h in topn: h.likelihood_temperature = temperature h.compute_posterior(eval_data) Z = logsumexp([h.posterior_score for h in topn]) seq.append([Z, amount, finite, [h.posterior_score for h in topn]]) dump(seq, open(dtype + '_posterior_seq' + suffix, 'w')) else: seq = load(open(_dir)) # ====================== compute KL based on seq ======================= print 'compute kl seq...' kl_seq = [] topn = seq.pop(0) for i in xrange(len(seq) - 1): kl_seq.append([seq[i][1], compute_kl2(seq[i], seq[i + 1])]) dump(kl_seq, open(dtype + '_kl_seq' + suffix, 'w'))
def make_staged_posterior_seq(_dir, temperature, lang_name, dtype): """ script: python parse_hypothesis.py --mode=make_staged_posterior_seq --file=file --temp=1 --language=AnBn --dtype=staged/uniform 1. read raw file 2. compute fixed Counter 3. compute posterior for different amounts dumped posterior format: [topn, [z,amount,finite,[s1,s2,....]], [], [], ....] NOTE: if _dir is previously dumped posterior seq, then we use it """ if not (os.path.isfile(_dir) and 'posterior_seq' in _dir): topn = set() for filename in os.listdir(_dir): if ('staged' in filename or 'normal' in filename) and 'seq' not in filename: print 'load', filename _set = load(open(_dir+filename)) topn.update([h for h in _set]) topn = list(topn) # fix the llcnts to save time and make curve smooth print 'get llcnts...' for h in topn: llcnts = Counter([h() for _ in xrange(2048)]) h.fixed_ll_counts = llcnts seq = [] seq.append(topn) for amount, finite in mk_staged_wlist(0,200,2,[48,96]): print 'posterior on', amount, finite if dtype == 'staged': language = instance(lang_name, finite) eval_data = language.sample_data_as_FuncData(amount) elif dtype == 'uniform': eval_data = uniform_data(amount, 12) for h in topn: h.likelihood_temperature = temperature h.compute_posterior(eval_data) Z = logsumexp([h.posterior_score for h in topn]) seq.append([Z, amount, finite, [h.posterior_score for h in topn]]) dump(seq,open(dtype + '_posterior_seq' + suffix, 'w')) else: seq = load(open(_dir)) # ====================== compute KL based on seq ======================= print 'compute kl seq...' kl_seq = [] topn = seq.pop(0) for i in xrange(len(seq)-1): kl_seq.append([seq[i][1],compute_kl2(seq[i],seq[i+1])]) dump(kl_seq,open(dtype + '_kl_seq' + suffix,'w'))