Example #1
0
def make_staged_seq2(jump, temp):
    """
    run: mpiexec -n 12
    """
    rec = load_hypo('out/simulations/staged/',
                    ['staged', 'normal0', 'normal1'])
    seen = set()
    work_list = slice_list(range(size), 3)

    for e in rec:
        for h in e[1]:
            if h in seen: continue
            seen.add(h)

    if rank in work_list[0]:
        seq = []
        infos = [[i, min(4 * ((int(i) - 1) / 48 + 1), 12)]
                 for i in [10**e for e in np.arange(0, 2.2, 0.1)]]

        for e in infos:
            prob_dict = {}
            language = AnBn(max_length=e[1] + (e[1] % 2 != 0))
            eval_data = language.sample_data_as_FuncData(e[0])

            for h in seen:
                h.likelihood_temperature = temp
                prob_dict[h] = h.compute_posterior(eval_data)

            seq.append(prob_dict)
            print 'rank: ', rank, e, 'done'
            fff()

    elif rank in work_list[1]:
        seq = []
        infos = [[i, 12] for i in [10**e for e in np.arange(0, 2.2, 0.1)]]

        for e in infos:
            prob_dict = {}
            language = AnBn(max_length=e[1])
            eval_data = language.sample_data_as_FuncData(e[0])

            for h in seen:
                h.likelihood_temperature = temp
                prob_dict[h] = h.compute_posterior(eval_data)

            seq.append(prob_dict)
            print 'rank: ', rank, e, 'done'
            fff()

    else:
        seq = []
        infos = [[i, 12] for i in [10**e for e in np.arange(0, 2.2, 0.1)]]

        for e in infos:
            prob_dict = {}
            eval_data = uniform_data(e[0], e[1])

            for h in seen:
                h.likelihood_temperature = temp
                prob_dict[h] = h.compute_posterior(eval_data)

            seq.append(prob_dict)
            print 'rank: ', rank, e, 'done'
            fff()

    # TODO no need ?
    from copy import deepcopy
    dict_0 = deepcopy(seq[0])
    for h in dict_0:
        dict_0[h] = h.compute_posterior(
            [FunctionData(input=[], output=Counter())])
    seq.insert(0, dict_0)
    dump(seq, open('seq' + str(rank) + suffix, 'w'))
Example #2
0
def make_staged_seq2(jump, temp):
    """
    run: mpiexec -n 12
    """
    rec = load_hypo('out/simulations/staged/', ['staged', 'normal0', 'normal1'])
    seen = set()
    work_list = slice_list(range(size), 3)

    for e in rec:
        for h in e[1]:
            if h in seen: continue
            seen.add(h)

    if rank in work_list[0]:
        seq = []
        infos = [[i, min(4*((int(i)-1)/48+1), 12)] for i in [10**e for e in np.arange(0, 2.2, 0.1)]]

        for e in infos:
            prob_dict = {}
            language = AnBn(max_length=e[1]+(e[1]%2!=0))
            eval_data = language.sample_data_as_FuncData(e[0])

            for h in seen:
                h.likelihood_temperature = temp
                prob_dict[h] = h.compute_posterior(eval_data)

            seq.append(prob_dict)
            print 'rank: ', rank, e, 'done'; fff()

    elif rank in work_list[1]:
        seq = []
        infos = [[i, 12] for i in [10**e for e in np.arange(0, 2.2, 0.1)]]

        for e in infos:
            prob_dict = {}
            language = AnBn(max_length=e[1])
            eval_data = language.sample_data_as_FuncData(e[0])

            for h in seen:
                h.likelihood_temperature = temp
                prob_dict[h] = h.compute_posterior(eval_data)

            seq.append(prob_dict)
            print 'rank: ', rank, e, 'done'; fff()

    else:
        seq = []
        infos = [[i, 12] for i in [10**e for e in np.arange(0, 2.2, 0.1)]]

        for e in infos:
            prob_dict = {}
            eval_data = uniform_data(e[0], e[1])

            for h in seen:
                h.likelihood_temperature = temp
                prob_dict[h] = h.compute_posterior(eval_data)

            seq.append(prob_dict)
            print 'rank: ', rank, e, 'done'; fff()

    # TODO no need ?
    from copy import deepcopy
    dict_0 = deepcopy(seq[0])
    for h in dict_0:
        dict_0[h] = h.compute_posterior([FunctionData(input=[], output=Counter())])
    seq.insert(0, dict_0)
    dump(seq, open('seq'+str(rank)+suffix, 'w'))
Example #3
0
def make_staged_posterior_seq(_dir, temperature, lang_name, dtype):
    """
        script: python parse_hypothesis.py --mode=make_staged_posterior_seq --file=file --temp=1 --language=AnBn --dtype=staged/uniform

        1. read raw file
        2. compute fixed Counter
        3. compute posterior for different amounts

        dumped posterior format: [topn, [z,amount,finite,[s1,s2,....]], [], [], ....]

        NOTE: if _dir is previously dumped posterior seq, then we use it
    """

    if not (os.path.isfile(_dir) and 'posterior_seq' in _dir):

        topn = set()

        for filename in os.listdir(_dir):
            if ('staged' in filename
                    or 'normal' in filename) and 'seq' not in filename:
                print 'load', filename
                _set = load(open(_dir + filename))
                topn.update([h for h in _set])
        topn = list(topn)

        # fix the llcnts to save time and make curve smooth
        print 'get llcnts...'
        for h in topn:
            llcnts = Counter([h() for _ in xrange(2048)])
            h.fixed_ll_counts = llcnts

        seq = []
        seq.append(topn)

        for amount, finite in mk_staged_wlist(0, 200, 2, [48, 96]):

            print 'posterior on', amount, finite

            if dtype == 'staged':
                language = instance(lang_name, finite)
                eval_data = language.sample_data_as_FuncData(amount)
            elif dtype == 'uniform':
                eval_data = uniform_data(amount, 12)

            for h in topn:
                h.likelihood_temperature = temperature
                h.compute_posterior(eval_data)

            Z = logsumexp([h.posterior_score for h in topn])
            seq.append([Z, amount, finite, [h.posterior_score for h in topn]])

        dump(seq, open(dtype + '_posterior_seq' + suffix, 'w'))

    else:
        seq = load(open(_dir))

    # ====================== compute KL based on seq =======================

    print 'compute kl seq...'
    kl_seq = []
    topn = seq.pop(0)
    for i in xrange(len(seq) - 1):
        kl_seq.append([seq[i][1], compute_kl2(seq[i], seq[i + 1])])

    dump(kl_seq, open(dtype + '_kl_seq' + suffix, 'w'))
Example #4
0
def make_staged_posterior_seq(_dir, temperature, lang_name, dtype):
    """
        script: python parse_hypothesis.py --mode=make_staged_posterior_seq --file=file --temp=1 --language=AnBn --dtype=staged/uniform

        1. read raw file
        2. compute fixed Counter
        3. compute posterior for different amounts

        dumped posterior format: [topn, [z,amount,finite,[s1,s2,....]], [], [], ....]

        NOTE: if _dir is previously dumped posterior seq, then we use it
    """
    
    if not (os.path.isfile(_dir) and 'posterior_seq' in _dir):

        topn = set()

        for filename in os.listdir(_dir):
            if ('staged' in filename or 'normal' in filename) and 'seq' not in filename:
                print 'load', filename
                _set = load(open(_dir+filename))
                topn.update([h for h in _set])
        topn = list(topn)

        # fix the llcnts to save time and make curve smooth
        print 'get llcnts...'
        for h in topn:
            llcnts = Counter([h() for _ in xrange(2048)])
            h.fixed_ll_counts = llcnts
            
            
        seq = []
        seq.append(topn)

        for amount, finite in mk_staged_wlist(0,200,2,[48,96]):
            
            print 'posterior on', amount, finite
            
            if dtype == 'staged':
                language = instance(lang_name, finite)
                eval_data = language.sample_data_as_FuncData(amount)
            elif dtype == 'uniform':
                eval_data = uniform_data(amount, 12)

            for h in topn:
                h.likelihood_temperature = temperature
                h.compute_posterior(eval_data)

            Z = logsumexp([h.posterior_score for h in topn])
            seq.append([Z, amount, finite, [h.posterior_score for h in topn]])

        dump(seq,open(dtype + '_posterior_seq' + suffix, 'w'))

    else:
        seq = load(open(_dir))
    

    # ====================== compute KL based on seq =======================
    
    print 'compute kl seq...'
    kl_seq = []
    topn = seq.pop(0)
    for i in xrange(len(seq)-1):
        kl_seq.append([seq[i][1],compute_kl2(seq[i],seq[i+1])])

    dump(kl_seq,open(dtype + '_kl_seq' + suffix,'w'))