コード例 #1
0
def pos_seq(rec, infos):
    """
    1. evaluate posterior probability on different data sizes

    run: serial
    """
    seq = []
    seen = set()

    for e in rec:
        for h in e[1]:
            if h in seen: continue
            seen.add(h)

    for e in infos:
        prob_dict = {}
        language = AnBn(max_length=e[1])
        eval_data = language.sample_data_as_FuncData(e[0])

        for h in seen:
            prob_dict[h] = h.compute_posterior(eval_data)

        seq.append(prob_dict)
        print e, 'done'
        fff()

    print '=' * 50
    return seq
コード例 #2
0
ファイル: parse_hypothesis.py プロジェクト: joshrule/LOTlib
def pos_seq(rec, infos):
    """
    1. evaluate posterior probability on different data sizes

    run: serial
    """
    seq = []
    seen = set()

    for e in rec:
        for h in e[1]:
            if h in seen: continue
            seen.add(h)

    for e in infos:
        prob_dict = {}
        language = AnBn(max_length=e[1])
        eval_data = language.sample_data_as_FuncData(e[0])

        for h in seen: prob_dict[h] = h.compute_posterior(eval_data)

        seq.append(prob_dict)
        print e, 'done'; fff()

    print '='*50
    return seq
コード例 #3
0
ファイル: parse_hypothesis.py プロジェクト: joshrule/LOTlib
def test_sto():
    """
    objective: test if our posterior distribution is stable for each time of estimation

    run: mpiexec -n 12
    """
    rec = load_hypo('out/simulations/staged/', ['staged', 'normal0', 'normal1'])
    # rec = load_hypo('out/simulations/staged/', ['staged'])

    seen = set()
    for e in rec:
        for h in e[1]:
            if h in seen: continue
            seen.add(h)
    print rank, 'hypo len: ', len(seen); fff()

    seq = []
    inner_kl_seq = []
    infos = [[i, 4*((i-1)/48+1)] for i in xrange(12, 145, 2)]

    for e in infos:
        prob_dict = {}
        language = AnBn(max_length=e[1])
        eval_data = language.sample_data_as_FuncData(e[0])

        for h in seen:prob_dict[h] = h.compute_posterior(eval_data)

        seq.append(prob_dict)
        if len(seq) > 1: inner_kl_seq.append(compute_kl(seq[-2], seq[-1]))
        print 'rank: ', rank, e, 'done'; fff()

    dump(seq, open('seq_'+str(rank)+suffix,'w'))
    dump(inner_kl_seq, open('inner_kl_seq_'+str(rank)+suffix, 'w'))

    if rank != 0:
        comm.send(seq, dest=0)
        print rank, 'send'; fff()
        sys.exit(0)
    else:
        seq_set = [seq]
        for i_s in xrange(size - 1):
            seq_set.append(comm.recv(source=i_s+1))
            print rank, 'recv:', i_s; fff()

        cross_kl_seq = []
        for i_s in xrange(len(seq_set[0])):
            tmp = []

            for i_ss in xrange(len(seq_set)-1):
                current_dict = seq_set[i_ss][i_s]
                next_dict = seq[i_ss+1][i_s]
                tmp.append(compute_kl(current_dict, next_dict))
                print 'row %i column %i done' % (i_ss, i_s); fff()

            cross_kl_seq.append(tmp)

        dump(cross_kl_seq, open('cross_kl_seq_'+str(rank)+suffix, 'w'))
        for e in cross_kl_seq:
            print e; fff()
コード例 #4
0
def test_sto():
    """
    objective: test if our posterior distribution is stable for each time of estimation

    run: mpiexec -n 12
    """
    rec = load_hypo('out/simulations/staged/',
                    ['staged', 'normal0', 'normal1'])
    # rec = load_hypo('out/simulations/staged/', ['staged'])

    seen = set()
    for e in rec:
        for h in e[1]:
            if h in seen: continue
            seen.add(h)
    print rank, 'hypo len: ', len(seen)
    fff()

    seq = []
    inner_kl_seq = []
    infos = [[i, 4 * ((i - 1) / 48 + 1)] for i in xrange(12, 145, 2)]

    for e in infos:
        prob_dict = {}
        language = AnBn(max_length=e[1])
        eval_data = language.sample_data_as_FuncData(e[0])

        for h in seen:
            prob_dict[h] = h.compute_posterior(eval_data)

        seq.append(prob_dict)
        if len(seq) > 1: inner_kl_seq.append(compute_kl(seq[-2], seq[-1]))
        print 'rank: ', rank, e, 'done'
        fff()

    dump(seq, open('seq_' + str(rank) + suffix, 'w'))
    dump(inner_kl_seq, open('inner_kl_seq_' + str(rank) + suffix, 'w'))

    if rank != 0:
        comm.send(seq, dest=0)
        print rank, 'send'
        fff()
        sys.exit(0)
    else:
        seq_set = [seq]
        for i_s in xrange(size - 1):
            seq_set.append(comm.recv(source=i_s + 1))
            print rank, 'recv:', i_s
            fff()

        cross_kl_seq = []
        for i_s in xrange(len(seq_set[0])):
            tmp = []

            for i_ss in xrange(len(seq_set) - 1):
                current_dict = seq_set[i_ss][i_s]
                next_dict = seq[i_ss + 1][i_s]
                tmp.append(compute_kl(current_dict, next_dict))
                print 'row %i column %i done' % (i_ss, i_s)
                fff()

            cross_kl_seq.append(tmp)

        dump(cross_kl_seq, open('cross_kl_seq_' + str(rank) + suffix, 'w'))
        for e in cross_kl_seq:
            print e
            fff()
コード例 #5
0
def make_staged_seq2(jump, temp):
    """
    run: mpiexec -n 12
    """
    rec = load_hypo('out/simulations/staged/',
                    ['staged', 'normal0', 'normal1'])
    seen = set()
    work_list = slice_list(range(size), 3)

    for e in rec:
        for h in e[1]:
            if h in seen: continue
            seen.add(h)

    if rank in work_list[0]:
        seq = []
        infos = [[i, min(4 * ((int(i) - 1) / 48 + 1), 12)]
                 for i in [10**e for e in np.arange(0, 2.2, 0.1)]]

        for e in infos:
            prob_dict = {}
            language = AnBn(max_length=e[1] + (e[1] % 2 != 0))
            eval_data = language.sample_data_as_FuncData(e[0])

            for h in seen:
                h.likelihood_temperature = temp
                prob_dict[h] = h.compute_posterior(eval_data)

            seq.append(prob_dict)
            print 'rank: ', rank, e, 'done'
            fff()

    elif rank in work_list[1]:
        seq = []
        infos = [[i, 12] for i in [10**e for e in np.arange(0, 2.2, 0.1)]]

        for e in infos:
            prob_dict = {}
            language = AnBn(max_length=e[1])
            eval_data = language.sample_data_as_FuncData(e[0])

            for h in seen:
                h.likelihood_temperature = temp
                prob_dict[h] = h.compute_posterior(eval_data)

            seq.append(prob_dict)
            print 'rank: ', rank, e, 'done'
            fff()

    else:
        seq = []
        infos = [[i, 12] for i in [10**e for e in np.arange(0, 2.2, 0.1)]]

        for e in infos:
            prob_dict = {}
            eval_data = uniform_data(e[0], e[1])

            for h in seen:
                h.likelihood_temperature = temp
                prob_dict[h] = h.compute_posterior(eval_data)

            seq.append(prob_dict)
            print 'rank: ', rank, e, 'done'
            fff()

    # TODO no need ?
    from copy import deepcopy
    dict_0 = deepcopy(seq[0])
    for h in dict_0:
        dict_0[h] = h.compute_posterior(
            [FunctionData(input=[], output=Counter())])
    seq.insert(0, dict_0)
    dump(seq, open('seq' + str(rank) + suffix, 'w'))
コード例 #6
0
ファイル: parse_hypothesis.py プロジェクト: joshrule/LOTlib
def make_staged_seq2(jump, temp):
    """
    run: mpiexec -n 12
    """
    rec = load_hypo('out/simulations/staged/', ['staged', 'normal0', 'normal1'])
    seen = set()
    work_list = slice_list(range(size), 3)

    for e in rec:
        for h in e[1]:
            if h in seen: continue
            seen.add(h)

    if rank in work_list[0]:
        seq = []
        infos = [[i, min(4*((int(i)-1)/48+1), 12)] for i in [10**e for e in np.arange(0, 2.2, 0.1)]]

        for e in infos:
            prob_dict = {}
            language = AnBn(max_length=e[1]+(e[1]%2!=0))
            eval_data = language.sample_data_as_FuncData(e[0])

            for h in seen:
                h.likelihood_temperature = temp
                prob_dict[h] = h.compute_posterior(eval_data)

            seq.append(prob_dict)
            print 'rank: ', rank, e, 'done'; fff()

    elif rank in work_list[1]:
        seq = []
        infos = [[i, 12] for i in [10**e for e in np.arange(0, 2.2, 0.1)]]

        for e in infos:
            prob_dict = {}
            language = AnBn(max_length=e[1])
            eval_data = language.sample_data_as_FuncData(e[0])

            for h in seen:
                h.likelihood_temperature = temp
                prob_dict[h] = h.compute_posterior(eval_data)

            seq.append(prob_dict)
            print 'rank: ', rank, e, 'done'; fff()

    else:
        seq = []
        infos = [[i, 12] for i in [10**e for e in np.arange(0, 2.2, 0.1)]]

        for e in infos:
            prob_dict = {}
            eval_data = uniform_data(e[0], e[1])

            for h in seen:
                h.likelihood_temperature = temp
                prob_dict[h] = h.compute_posterior(eval_data)

            seq.append(prob_dict)
            print 'rank: ', rank, e, 'done'; fff()

    # TODO no need ?
    from copy import deepcopy
    dict_0 = deepcopy(seq[0])
    for h in dict_0:
        dict_0[h] = h.compute_posterior([FunctionData(input=[], output=Counter())])
    seq.insert(0, dict_0)
    dump(seq, open('seq'+str(rank)+suffix, 'w'))
コード例 #7
0
ファイル: SkewedFrequency.py プロジェクト: flrgsr/LOTlib
In this case, we investigate the effect of different observed data distributions on training convergence.
"""

if __name__ == '__main__':
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()

    # ========================================================================================================
    # Process command line arguments
    # ========================================================================================================
    (options, args) = parser.parse_args()

    suffix = time.strftime('_' + options.NAME + '_%m%d_%H%M%S', time.localtime())
    prefix = '../out/simulations/skewed/'

    # ========================================================================================================
    # Running
    # ========================================================================================================
    language = AnBn()

    show_info('running skewed input case..')
    rec = probe_MHsampler(make_hypothesis('AnBn'), language, options, prefix + 'skewed_out_' + str(rank) + suffix)

    show_info('running normal input case..')
    CASE += 1
    cnt = Counter()
    num = 64.0 * 2 / options.FINITE
    for i in xrange(1, options.FINITE/2+1):
        cnt['a'*i+'b'*i] = num

    rec1 = probe_MHsampler(make_hypothesis('AnBn'), language, options, prefix + 'normal_out' + str(rank) + suffix, data=[FunctionData(input=[], output=cnt)])