def pos_seq(rec, infos): """ 1. evaluate posterior probability on different data sizes run: serial """ seq = [] seen = set() for e in rec: for h in e[1]: if h in seen: continue seen.add(h) for e in infos: prob_dict = {} language = AnBn(max_length=e[1]) eval_data = language.sample_data_as_FuncData(e[0]) for h in seen: prob_dict[h] = h.compute_posterior(eval_data) seq.append(prob_dict) print e, 'done' fff() print '=' * 50 return seq
def pos_seq(rec, infos): """ 1. evaluate posterior probability on different data sizes run: serial """ seq = [] seen = set() for e in rec: for h in e[1]: if h in seen: continue seen.add(h) for e in infos: prob_dict = {} language = AnBn(max_length=e[1]) eval_data = language.sample_data_as_FuncData(e[0]) for h in seen: prob_dict[h] = h.compute_posterior(eval_data) seq.append(prob_dict) print e, 'done'; fff() print '='*50 return seq
def test_sto(): """ objective: test if our posterior distribution is stable for each time of estimation run: mpiexec -n 12 """ rec = load_hypo('out/simulations/staged/', ['staged', 'normal0', 'normal1']) # rec = load_hypo('out/simulations/staged/', ['staged']) seen = set() for e in rec: for h in e[1]: if h in seen: continue seen.add(h) print rank, 'hypo len: ', len(seen); fff() seq = [] inner_kl_seq = [] infos = [[i, 4*((i-1)/48+1)] for i in xrange(12, 145, 2)] for e in infos: prob_dict = {} language = AnBn(max_length=e[1]) eval_data = language.sample_data_as_FuncData(e[0]) for h in seen:prob_dict[h] = h.compute_posterior(eval_data) seq.append(prob_dict) if len(seq) > 1: inner_kl_seq.append(compute_kl(seq[-2], seq[-1])) print 'rank: ', rank, e, 'done'; fff() dump(seq, open('seq_'+str(rank)+suffix,'w')) dump(inner_kl_seq, open('inner_kl_seq_'+str(rank)+suffix, 'w')) if rank != 0: comm.send(seq, dest=0) print rank, 'send'; fff() sys.exit(0) else: seq_set = [seq] for i_s in xrange(size - 1): seq_set.append(comm.recv(source=i_s+1)) print rank, 'recv:', i_s; fff() cross_kl_seq = [] for i_s in xrange(len(seq_set[0])): tmp = [] for i_ss in xrange(len(seq_set)-1): current_dict = seq_set[i_ss][i_s] next_dict = seq[i_ss+1][i_s] tmp.append(compute_kl(current_dict, next_dict)) print 'row %i column %i done' % (i_ss, i_s); fff() cross_kl_seq.append(tmp) dump(cross_kl_seq, open('cross_kl_seq_'+str(rank)+suffix, 'w')) for e in cross_kl_seq: print e; fff()
def test_sto(): """ objective: test if our posterior distribution is stable for each time of estimation run: mpiexec -n 12 """ rec = load_hypo('out/simulations/staged/', ['staged', 'normal0', 'normal1']) # rec = load_hypo('out/simulations/staged/', ['staged']) seen = set() for e in rec: for h in e[1]: if h in seen: continue seen.add(h) print rank, 'hypo len: ', len(seen) fff() seq = [] inner_kl_seq = [] infos = [[i, 4 * ((i - 1) / 48 + 1)] for i in xrange(12, 145, 2)] for e in infos: prob_dict = {} language = AnBn(max_length=e[1]) eval_data = language.sample_data_as_FuncData(e[0]) for h in seen: prob_dict[h] = h.compute_posterior(eval_data) seq.append(prob_dict) if len(seq) > 1: inner_kl_seq.append(compute_kl(seq[-2], seq[-1])) print 'rank: ', rank, e, 'done' fff() dump(seq, open('seq_' + str(rank) + suffix, 'w')) dump(inner_kl_seq, open('inner_kl_seq_' + str(rank) + suffix, 'w')) if rank != 0: comm.send(seq, dest=0) print rank, 'send' fff() sys.exit(0) else: seq_set = [seq] for i_s in xrange(size - 1): seq_set.append(comm.recv(source=i_s + 1)) print rank, 'recv:', i_s fff() cross_kl_seq = [] for i_s in xrange(len(seq_set[0])): tmp = [] for i_ss in xrange(len(seq_set) - 1): current_dict = seq_set[i_ss][i_s] next_dict = seq[i_ss + 1][i_s] tmp.append(compute_kl(current_dict, next_dict)) print 'row %i column %i done' % (i_ss, i_s) fff() cross_kl_seq.append(tmp) dump(cross_kl_seq, open('cross_kl_seq_' + str(rank) + suffix, 'w')) for e in cross_kl_seq: print e fff()
def make_staged_seq2(jump, temp): """ run: mpiexec -n 12 """ rec = load_hypo('out/simulations/staged/', ['staged', 'normal0', 'normal1']) seen = set() work_list = slice_list(range(size), 3) for e in rec: for h in e[1]: if h in seen: continue seen.add(h) if rank in work_list[0]: seq = [] infos = [[i, min(4 * ((int(i) - 1) / 48 + 1), 12)] for i in [10**e for e in np.arange(0, 2.2, 0.1)]] for e in infos: prob_dict = {} language = AnBn(max_length=e[1] + (e[1] % 2 != 0)) eval_data = language.sample_data_as_FuncData(e[0]) for h in seen: h.likelihood_temperature = temp prob_dict[h] = h.compute_posterior(eval_data) seq.append(prob_dict) print 'rank: ', rank, e, 'done' fff() elif rank in work_list[1]: seq = [] infos = [[i, 12] for i in [10**e for e in np.arange(0, 2.2, 0.1)]] for e in infos: prob_dict = {} language = AnBn(max_length=e[1]) eval_data = language.sample_data_as_FuncData(e[0]) for h in seen: h.likelihood_temperature = temp prob_dict[h] = h.compute_posterior(eval_data) seq.append(prob_dict) print 'rank: ', rank, e, 'done' fff() else: seq = [] infos = [[i, 12] for i in [10**e for e in np.arange(0, 2.2, 0.1)]] for e in infos: prob_dict = {} eval_data = uniform_data(e[0], e[1]) for h in seen: h.likelihood_temperature = temp prob_dict[h] = h.compute_posterior(eval_data) seq.append(prob_dict) print 'rank: ', rank, e, 'done' fff() # TODO no need ? from copy import deepcopy dict_0 = deepcopy(seq[0]) for h in dict_0: dict_0[h] = h.compute_posterior( [FunctionData(input=[], output=Counter())]) seq.insert(0, dict_0) dump(seq, open('seq' + str(rank) + suffix, 'w'))
comm = MPI.COMM_WORLD rank = comm.Get_rank() # ======================================================================================================== # Process command line arguments # ======================================================================================================== (options, args) = parser.parse_args() suffix = time.strftime('_' + options.NAME + '_%m%d_%H%M%S', time.localtime()) prefix = '../out/simulations/skewed/' # ======================================================================================================== # Running # ======================================================================================================== language = AnBn() show_info('running skewed input case..') rec = probe_MHsampler(make_hypothesis('AnBn'), language, options, prefix + 'skewed_out_' + str(rank) + suffix) show_info('running normal input case..') CASE += 1 cnt = Counter() num = 64.0 * 2 / options.FINITE for i in xrange(1, options.FINITE / 2 + 1): cnt['a' * i + 'b' * i] = num rec1 = probe_MHsampler(make_hypothesis('AnBn'), language, options,
def make_staged_seq2(jump, temp): """ run: mpiexec -n 12 """ rec = load_hypo('out/simulations/staged/', ['staged', 'normal0', 'normal1']) seen = set() work_list = slice_list(range(size), 3) for e in rec: for h in e[1]: if h in seen: continue seen.add(h) if rank in work_list[0]: seq = [] infos = [[i, min(4*((int(i)-1)/48+1), 12)] for i in [10**e for e in np.arange(0, 2.2, 0.1)]] for e in infos: prob_dict = {} language = AnBn(max_length=e[1]+(e[1]%2!=0)) eval_data = language.sample_data_as_FuncData(e[0]) for h in seen: h.likelihood_temperature = temp prob_dict[h] = h.compute_posterior(eval_data) seq.append(prob_dict) print 'rank: ', rank, e, 'done'; fff() elif rank in work_list[1]: seq = [] infos = [[i, 12] for i in [10**e for e in np.arange(0, 2.2, 0.1)]] for e in infos: prob_dict = {} language = AnBn(max_length=e[1]) eval_data = language.sample_data_as_FuncData(e[0]) for h in seen: h.likelihood_temperature = temp prob_dict[h] = h.compute_posterior(eval_data) seq.append(prob_dict) print 'rank: ', rank, e, 'done'; fff() else: seq = [] infos = [[i, 12] for i in [10**e for e in np.arange(0, 2.2, 0.1)]] for e in infos: prob_dict = {} eval_data = uniform_data(e[0], e[1]) for h in seen: h.likelihood_temperature = temp prob_dict[h] = h.compute_posterior(eval_data) seq.append(prob_dict) print 'rank: ', rank, e, 'done'; fff() # TODO no need ? from copy import deepcopy dict_0 = deepcopy(seq[0]) for h in dict_0: dict_0[h] = h.compute_posterior([FunctionData(input=[], output=Counter())]) seq.insert(0, dict_0) dump(seq, open('seq'+str(rank)+suffix, 'w'))